/*******************************************************************************
* Objective: Create urban shares based on data from IPUMS
*******************************************************************************/

* Set Stata version
version 16

* Stablish Working Directory ***************************************************
cd "$workdirectory"

* Settings *********************************************************************
capture log close
clear all
set more off

********************************************************************************
**# Step 1: Importing and combining raw datasets
********************************************************************************

import delimited "raw_datasets/IPUMS_labor/urban-rural/lac_urbsh.csv", clear
tempfile lac_urbsh
save `lac_urbsh', replace

import delimited "raw_datasets/IPUMS_labor/urban-rural/africa_urbsh.csv", clear
tempfile africa_urbsh
save `africa_urbsh', replace

import delimited "raw_datasets/IPUMS_labor/urban-rural/asia_urbsh.csv", clear
tempfile asia_urbsh
save `asia_urbsh', replace

import delimited "raw_datasets/IPUMS_labor/urban-rural/fiji_urbsh.csv", clear
tempfile fiji_urbsh
save `fiji_urbsh', replace

append using `lac_urbsh'
append using `africa_urbsh'
append using `asia_urbsh'
append using `fiji_urbsh'

drop type_var rowtotal

********************************************************************************
**# Step 2: Format Data
********************************************************************************

* Generating country and year variable
drop if census == "COL TOTAL"
gen country = regexs(0) if regexm(census, "(([a-zA-Z]+)[ ]*([a-zA-Z]+))")
gen year = regexs(0) if(regexm(census, "[0-9][0-9][0-9][0-9]$"))
drop census

* Merging Sudan
replace rural = rural[129] + rural[130] if country == "Sudan"
replace urban = urban[129] + urban[130] if country == "Sudan"

* Generating shares
gen urbsh = (urban/(rural+urban))*100
la var urbsh "Urban/(Rural+Urban), doesn't consider 'Unknown'"

* Making dataset compatible with main dataset
drop if country == "Armenia"
drop if country == "Palestine"
drop if country == "Saint Lucia"
drop if country == "Kyrgyz Republic"
replace country = "Venezuela, RB" if country == "Venezuela"
replace country = "Iran, Islamic Rep." if country == "Iran"
replace country = "Egypt, Arab Rep." if country == "Egypt"
drop if country == "South Sudan"

********************************************************************************
**# Step 3: Save
********************************************************************************
save "processed_datasets/allregions_urbsh", replace
